HW 02

Author

Meredith Jean-Baptiste

Published

May 30, 2025

1 - A new day, a new plot, a new geom

#SETUP for THEMES 
  if (!require("pacman"))
    install.packages("pacman")
Loading required package: pacman
pacman::p_load(here)
pacman::p_load(tidyverse, colorspace, palmerpenguins, fs, lubridate, scales, openintro, gghighlight, glue, ggridges, dplyr, tidyr, forcats)

ggplot2::theme_set(ggplot2::theme_minimal(base_size = 14))

options(width = 65)

knitr::opts_chunk$set(
  fig.width = 7,        # 7" width
  fig.asp = 0.618,      # the golden ratio
  fig.retina = 3,       # dpi multiplier for displaying HTML output on retina
  fig.align = "center", # center align figures
  dpi = 300             # higher dpi, sharper image
)

devtools::install_github("tidyverse/dsbox")
Skipping install of 'dsbox' from a github remote, the SHA1 (244ecdfe) has not changed since last install.
  Use `force = TRUE` to force installation
library(dsbox)
my_edibnb_data <- edibnb

#make a ridge plot of AirBnB review scores of Edinburgh neighborhoods, ordered by their median review scores
#stat_summary() function to summarize your data. To order the data by mean, you can use the fct_reorder() function from the forcats package to reorder the factor levels based on the calculated means. Note: this did not work for me for some reason
#edibnb %>% 
#  group_by(neighbourhood) %>%
#  summarize(mean_score = mean(review_scores_rating), .groups = "drop") %>% 
#  mutate(neighbourhood = fct_reorder(neighbourhood, mean_score)) %>% 
#  geom_density_ridges(review_scores_rating ~ fct_reorder(neighbourhood, review_scores_rating))+
edibnb %>% 
ggplot(aes(x = review_scores_rating, y = neighbourhood))+
  geom_density_ridges()+
  labs(
    x = "Review score ratings",
    y = "Neighbourhood",
    title = "AirBNB review score ratings by Edinburgh neighbourhoods",
    #sources for calculating the means of the scores, how to reorder the means and the geom density plot
    caption = "Source: TidyTuesday"
  )
Picking joint bandwidth of 1.21
Warning: Removed 2177 rows containing non-finite outside the scale range
(`stat_density_ridges()`).

#view(edibnb)
#glimpse(edibnb)

2 - Foreign Connected PACs

#| label: SETUP
#SETUP for THEMES 
  if (!require("pacman"))
    install.packages("pacman")
pacman::p_load(here)
pacman::p_load(tidyverse, colorspace, palmerpenguins, fs, lubridate, scales, openintro, gghighlight, glue, ggridges, dplyr, tidyr)
ggplot2::theme_set(ggplot2::theme_minimal(base_size = 14))
options(width = 65)
knitr::opts_chunk$set(
  fig.width = 7,        # 7" width
  fig.asp = 0.618,      # the golden ratio
  fig.retina = 3,       # dpi multiplier for displaying HTML output on retina
  fig.align = "center", # center align figures
  dpi = 300             # higher dpi, sharper image
)

# get a list of files with "Foreign Connected PAC" in their names
list_of_files <- dir_ls(path = "data", regexp = "Foreign Connected PAC")
# read all files and row bind them
# keeping track of the file name in a new column called year
pac <- read_csv(list_of_files, id = "year")
Rows: 2394 Columns: 6
── Column specification ─────────────────────────────────────────
Delimiter: ","
chr (5): PAC Name (Affiliate), Country of Origin/Parent Compa...

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# clean the names and convert character amounts to numeric
pac %>% 
   janitor::clean_names()
# A tibble: 2,394 × 6
   year     pac_name_affiliate country_of_origin_pa…¹ total dems 
   <chr>    <chr>              <chr>                  <chr> <chr>
 1 data/Fo… 7-Eleven           Japan/Ito-Yokado       $8500 $1500
 2 data/Fo… ABB Group          Switzerland/Asea Brow… $460… $170…
 3 data/Fo… Accenture          UK/Accenture plc       $759… $230…
 4 data/Fo… ACE INA            UK/ACE Group           $385… $125…
 5 data/Fo… Acuson Corp (Siem… Germany/Siemens AG     $2000 $2000
 6 data/Fo… Adtranz (DaimlerC… Germany/DaimlerChrysl… $105… $100…
 7 data/Fo… AE Staley Manufac… UK/Tate & Lyle         $240… $100…
 8 data/Fo… AEGON USA (AEGON … Netherlands/Aegon NV   $582… $105…
 9 data/Fo… AIM Management Gr… UK/AMVESCAP            $250… $100…
10 data/Fo… Air Liquide Ameri… France/L'Air Liquide … $0    $0   
# ℹ 2,384 more rows
# ℹ abbreviated name: ¹​country_of_origin_parent_company
# ℹ 1 more variable: repubs <chr>
pac$Dems = as.numeric(gsub("\\$", "", pac$Dems))
pac$Repubs = as.numeric(gsub("\\$", "", pac$Repubs))

# Note: another way to format a date into just the year:
# pac$year = format(pac$year, "%Y")

#Datawrangling
#This.was.hard
#split Country/Parent column using '/' as the separator
pac[c('country', 'parent')] <- str_split_fixed(pac$`Country of Origin/Parent Company`, '/', 2)
#split year column using '.' as the separator
pac <-  separate(pac, year, into = c("full", "csv"), sep = "[.]", extra = "drop")
#split full column using '-' as the separator
pac[c('first', 'year')] <- str_split_fixed(pac$`full`, '-', 2)
#convert year to numeric(double)
pac$year <- as.numeric(as.character(pac$year))
#pac <- arrange(year) %>% 
#Rename
pac <- pac %>% rename_at('PAC Name (Affiliate)', ~'pac_name_affilate')
#dropping a column
pac <- subset(pac, select = -c(full, csv, Total, `Country of Origin/Parent Company`, first))
#Rename partys
pac <- pac %>% rename_at ('Dems', ~'Party_D')
pac <- pac %>% rename_at ('Repubs', ~'Party_R')

pac <- pac %>%  filter(country == "UK")
# select and order the columns
pac <- pac %>% select(country, year, Party_D, Party_R) 

#add the totals per year (did not work)
#pac %>% 
 # tapply(pac$Party_D, pac$year, function = sum)+
  #tapply(pac$Party_R, pac$year, function = sum)
#this didnt work either
#pac <- pac %>% 
#  group_by(year) %>% 
#  summarise(Party_D)+
#  summarise(Party_R)

#neither did this
#summarizing the amount per year
 #group_by(year) %>% 
  #summarise(amount = sum(amount))%>% 

#pivot long did not work at first , now it does
pac <- pac %>% 
  pivot_longer(cols = starts_with("Party"),
               names_to = "party",
               values_to = "amount",
               values_drop_na = TRUE)
pac <- pac %>% 
  group_by(year, party, .drop = TRUE) %>% 
  summarize(total_contributions = sum(amount))
`summarise()` has grouped output by 'year'. You can override
using the `.groups` argument.
pac %>% 
  ggplot(aes(x = year, y = total_contributions, color = party))+
  geom_line(show.legend = TRUE)+
  geom_line(linewidth = 1) +
  theme_classic() +
  scale_color_manual(values = c("blue", "red"),
                     labels = c("Democrat", "Republican"))+
  scale_y_continuous(breaks = seq(from = 0, to = 3000000, by = 1000000),
                     labels = comma_format(big.mark = ","))+
  #scale_y_discrete (label_dollar ("1M", "2M", "3M"))+
  labs(
    y = "Total amount",
    x = "Year",
    title = "Contributions to US political parties from UK-connected PACs",
    caption = "OpenSecrets.org")

#view(pac)
#glimpse(pac)

2b - Foreign Connected PACs

#| label: SETUP
#SETUP for THEMES 
  if (!require("pacman"))
    install.packages("pacman")
pacman::p_load(here)
pacman::p_load(tidyverse, colorspace, palmerpenguins, fs, lubridate, scales, openintro, gghighlight, glue, ggridges, dplyr, tidyr)
ggplot2::theme_set(ggplot2::theme_minimal(base_size = 14))
options(width = 65)
knitr::opts_chunk$set(
  fig.width = 7,        # 7" width
  fig.asp = 0.618,      # the golden ratio
  fig.retina = 3,       # dpi multiplier for displaying HTML output on retina
  fig.align = "center", # center align figures
  dpi = 300             # higher dpi, sharper image
)

# get a list of files with "Foreign Connected PAC" in their names
list_of_files <- dir_ls(path = "data", regexp = "Foreign Connected PAC")
# read all files and row bind them
# keeping track of the file name in a new column called year
pac <- read_csv(list_of_files, id = "year")
Rows: 2394 Columns: 6
── Column specification ─────────────────────────────────────────
Delimiter: ","
chr (5): PAC Name (Affiliate), Country of Origin/Parent Compa...

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# clean the names and convert character amounts to numeric
pac %>% 
   janitor::clean_names()
# A tibble: 2,394 × 6
   year     pac_name_affiliate country_of_origin_pa…¹ total dems 
   <chr>    <chr>              <chr>                  <chr> <chr>
 1 data/Fo… 7-Eleven           Japan/Ito-Yokado       $8500 $1500
 2 data/Fo… ABB Group          Switzerland/Asea Brow… $460… $170…
 3 data/Fo… Accenture          UK/Accenture plc       $759… $230…
 4 data/Fo… ACE INA            UK/ACE Group           $385… $125…
 5 data/Fo… Acuson Corp (Siem… Germany/Siemens AG     $2000 $2000
 6 data/Fo… Adtranz (DaimlerC… Germany/DaimlerChrysl… $105… $100…
 7 data/Fo… AE Staley Manufac… UK/Tate & Lyle         $240… $100…
 8 data/Fo… AEGON USA (AEGON … Netherlands/Aegon NV   $582… $105…
 9 data/Fo… AIM Management Gr… UK/AMVESCAP            $250… $100…
10 data/Fo… Air Liquide Ameri… France/L'Air Liquide … $0    $0   
# ℹ 2,384 more rows
# ℹ abbreviated name: ¹​country_of_origin_parent_company
# ℹ 1 more variable: repubs <chr>
pac$Dems = as.numeric(gsub("\\$", "", pac$Dems))
pac$Repubs = as.numeric(gsub("\\$", "", pac$Repubs))

# Note: another way to format a date into just the year:
# pac$year = format(pac$year, "%Y")

#Datawrangling
#This.was.hard
#split Country/Parent column using '/' as the separator
pac[c('country', 'parent')] <- str_split_fixed(pac$`Country of Origin/Parent Company`, '/', 2)
#split year column using '.' as the separator
pac <-  separate(pac, year, into = c("full", "csv"), sep = "[.]", extra = "drop")
#split full column using '-' as the separator
pac[c('first', 'year')] <- str_split_fixed(pac$`full`, '-', 2)
#convert year to numeric(double)
pac$year <- as.numeric(as.character(pac$year))
#pac <- arrange(year) %>% 
#Rename
pac <- pac %>% rename_at('PAC Name (Affiliate)', ~'pac_name_affilate')
#dropping a column
pac <- subset(pac, select = -c(full, csv, Total, `Country of Origin/Parent Company`, first))
#Rename partys
pac <- pac %>% rename_at ('Dems', ~'Party_D')
pac <- pac %>% rename_at ('Repubs', ~'Party_R')

pac <- pac %>%  filter(country == "Germany")
# select and order the columns
pac <- pac %>% select(country, year, Party_D, Party_R) 

#pivot long did not work at first , now it does
pac <- pac %>% 
  pivot_longer(cols = starts_with("Party"),
               names_to = "party",
               values_to = "amount",
               values_drop_na = TRUE)
pac <- pac %>% 
  group_by(year, party, .drop = TRUE) %>% 
  summarize(total_contributions = sum(amount))
`summarise()` has grouped output by 'year'. You can override
using the `.groups` argument.
pac %>% 
  ggplot(aes(x = year, y = total_contributions, color = party))+
  geom_line(show.legend = TRUE)+
  geom_line(linewidth = 1) +
  theme_classic() +
  scale_color_manual(values = c("blue", "red"),
                     labels = c("Democrat", "Republican"))+
  scale_y_continuous(breaks = seq(from = 0, to = 3000000, by = 500000),
                     labels = comma_format(big.mark = ","))+
  #scale_y_discrete (label_dollar ("1M", "2M", "3M"))+
  labs(
    y = "Total amount",
    x = "Year",
    title = "Contributions to US political parties from German-connected PACs",
    caption = "OpenSecrets.org")

#view(pac)
#glimpse(pac)

3 - Median housing prices in the US

#SETUP for THEMES 
  if (!require("pacman"))
    install.packages("pacman")
pacman::p_load(here)
pacman::p_load(tidyverse, colorspace, palmerpenguins, fs, lubridate, scales, openintro, gghighlight, glue, ggridges, dplyr)

ggplot2::theme_set(ggplot2::theme_minimal(base_size = 14))

options(width = 65)

knitr::opts_chunk$set(
  fig.width = 7,        # 7" width
  fig.asp = 0.618,      # the golden ratio
  fig.retina = 3,       # dpi multiplier for displaying HTML output on retina
  fig.align = "center", # center align figures
  dpi = 300             # higher dpi, sharper image
)

# load the dataset 
housing<-read_csv(here("data", "median-housing.csv"))
Rows: 234 Columns: 2
── Column specification ─────────────────────────────────────────
Delimiter: ","
dbl  (1): MSPUS
date (1): DATE

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#adjust the date
housing %>% 
  mutate(date = mdy(DATE))
Warning: There was 1 warning in `mutate()`.
ℹ In argument: `date = mdy(DATE)`.
Caused by warning:
! All formats failed to parse. No formats found.
# A tibble: 234 × 3
   DATE       MSPUS date  
   <date>     <dbl> <date>
 1 1963-01-01 17800 NA    
 2 1963-04-01 18000 NA    
 3 1963-07-01 17900 NA    
 4 1963-10-01 18500 NA    
 5 1964-01-01 18500 NA    
 6 1964-04-01 18900 NA    
 7 1964-07-01 18900 NA    
 8 1964-10-01 19400 NA    
 9 1965-01-01 20200 NA    
10 1965-04-01 19800 NA    
# ℹ 224 more rows
#Rename price
housing <- housing %>% rename_at ('MSPUS', ~'price')
housing <- housing %>% rename_at ('DATE', ~'date')

#split date column using '-' as the separator
housing [c('year', 'month')] <- str_split_fixed(housing$`date`, '-', 2)

#dropping a column - no need to drop month and date
#housing <- subset(housing, select = -c(month, date))
#convert year to dbl
housing$year <- as.numeric(as.character(housing$year)) 

# Define x-axis break interval as 10 years (did not work)
#year_min <- min(housing$year, na.rm = TRUE)
#year_max <- max(housing$year, na.rm = TRUE)

#If I put the x=date, the line is correct but the labels are all piled up/ OR the error message "Can't convert `x` <date> to <double>.".
#If I put x= year, the line is choppy but the labels are correct
#Finally resolved this issue using scale_x_date with breaks
housing %>% 
  ggplot(aes(
    x = date,
    y = price,
  )) +
   geom_line(color = "blue")+ 
  scale_x_date(date_breaks = "5 years", date_labels =  "%Y")+ 
  #scale_x_continuous(breaks = seq(from = 1965, to = 2020, by = 5)) +
  #scale_x_discrete(labels = c("1965", "1970", "1975", "1980", "1985", "1990", "1995", "2000", "2005", "2010", "2015", "2020"))+
  #tried to add comma's in the y-axis numbers (unsuccessful at first!) using the labels=comma:
  scale_y_continuous(breaks = seq(from = 0, to = 440000, by = 40000),
                     labels = comma_format(big.mark = ","))+
  labs(
    x = "",
    y = "Dollars",
    title = "Median sales price of houses sold in the United States", 
    subtitle = "Not seasonally adjusted",
    caption = "Sources: Census;HUD"
  )+
    theme(plot.title = element_text(hjust = -1),
          plot.subtitle = element_text(hjust = -0.15))+
#scale_x_date(date_labels = year)+

# Hide the lyear# Hide the legend
      theme(legend.position = "none")  

#glimpse(housing)
#view(housing)

3b - Median housing prices in the US: RECESSION LINES

#SETUP for THEMES 
  if (!require("pacman"))
    install.packages("pacman")
pacman::p_load(here)
pacman::p_load(tidyverse, colorspace, palmerpenguins, fs, lubridate, scales, openintro, gghighlight, glue, ggridges, dplyr)

ggplot2::theme_set(ggplot2::theme_minimal(base_size = 14))

options(width = 65)

knitr::opts_chunk$set(
  fig.width = 7,        # 7" width
  fig.asp = 0.618,      # the golden ratio
  fig.retina = 3,       # dpi multiplier for displaying HTML output on retina
  fig.align = "center", # center align figures
  dpi = 300             # higher dpi, sharper image
)

# load the dataset 
housing<-read_csv(here("data", "median-housing.csv"))
Rows: 234 Columns: 2
── Column specification ─────────────────────────────────────────
Delimiter: ","
dbl  (1): MSPUS
date (1): DATE

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#adjust the date
housing %>% 
  mutate(date = mdy(DATE))
Warning: There was 1 warning in `mutate()`.
ℹ In argument: `date = mdy(DATE)`.
Caused by warning:
! All formats failed to parse. No formats found.
# A tibble: 234 × 3
   DATE       MSPUS date  
   <date>     <dbl> <date>
 1 1963-01-01 17800 NA    
 2 1963-04-01 18000 NA    
 3 1963-07-01 17900 NA    
 4 1963-10-01 18500 NA    
 5 1964-01-01 18500 NA    
 6 1964-04-01 18900 NA    
 7 1964-07-01 18900 NA    
 8 1964-10-01 19400 NA    
 9 1965-01-01 20200 NA    
10 1965-04-01 19800 NA    
# ℹ 224 more rows
#Rename price
housing <- housing %>% rename_at ('MSPUS', ~'price')
housing <- housing %>% rename_at ('DATE', ~'date')

#split date column using '-' as the separator
housing [c('year', 'month')] <- str_split_fixed(housing$`date`, '-', 2)

#dropping a column - no need to drop month and date
#housing <- subset(housing, select = -c(month, date))
#convert year to dbl
housing$year <- as.numeric(as.character(housing$year)) 

#housing %>% 
#  mutate(recession = if_else(price>, "TRUE", "FALSE"))


# Define x-axis break interval as 10 years (did not work)
#year_min <- min(housing$date, na.rm = TRUE)
#year_max <- max(housing$date, na.rm = TRUE)

#If I put the x=date, the line is correct but the labels are all piled up.
#If I put x= year, the line is choppy but the labels are correct
housing %>% 
  ggplot(aes(
    x = date,
    y = price,
  )) +
   geom_line(color = "blue")+ 
  scale_x_date(date_breaks = "5 years", date_labels =  "%Y")+ 
  scale_y_continuous(breaks = seq(from = 0, to = 440000, by = 40000),
                     labels = comma_format(big.mark = ","))+
  labs(
    x = "",
    y = "Dollars",
    title = "Median sales price of houses sold in the United States", 
    subtitle = "Not seasonally adjusted",
    caption = "Shaded areas indicate U.S. recessions \nSources: Census; HUD"
  )+
   theme(plot.title = element_text(hjust = -1.2),
          plot.subtitle = element_text(hjust = -0.2))+
 #add recession grey areas to the plot (not working)
   #geom_recessions(
  #fill = "#BDCFDE",
  #alpha = 1,
  #draw_top_bar = TRUE,
  #top_fill = "#bdcfde",
  #top_alpha = 1,
  #method = c("peak","midpoint", "trough" ))+
#scale_x_date(date_labels = year)+

# Hide the lyear# Hide the legend
      theme(legend.position = "none")

3C - Subset of Median Housing 2019 - 2020

#SETUP for THEMES 
  if (!require("pacman"))
    install.packages("pacman")
pacman::p_load(here)
pacman::p_load(tidyverse, colorspace, palmerpenguins, fs, lubridate, scales, openintro, gghighlight, glue, ggridges, dplyr)

ggplot2::theme_set(ggplot2::theme_minimal(base_size = 14))

options(width = 65)

knitr::opts_chunk$set(
  fig.width = 7,        # 7" width
  fig.asp = 0.618,      # the golden ratio
  fig.retina = 3,       # dpi multiplier for displaying HTML output on retina
  fig.align = "center", # center align figures
  dpi = 300             # higher dpi, sharper image
)

# load the dataset 
housing<-read_csv(here("data", "median-housing.csv"))
Rows: 234 Columns: 2
── Column specification ─────────────────────────────────────────
Delimiter: ","
dbl  (1): MSPUS
date (1): DATE

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#adjust the date
housing %>% 
  mutate(date = mdy(DATE))
Warning: There was 1 warning in `mutate()`.
ℹ In argument: `date = mdy(DATE)`.
Caused by warning:
! All formats failed to parse. No formats found.
# A tibble: 234 × 3
   DATE       MSPUS date  
   <date>     <dbl> <date>
 1 1963-01-01 17800 NA    
 2 1963-04-01 18000 NA    
 3 1963-07-01 17900 NA    
 4 1963-10-01 18500 NA    
 5 1964-01-01 18500 NA    
 6 1964-04-01 18900 NA    
 7 1964-07-01 18900 NA    
 8 1964-10-01 19400 NA    
 9 1965-01-01 20200 NA    
10 1965-04-01 19800 NA    
# ℹ 224 more rows
#Rename price
housing <- housing %>% rename_at ('MSPUS', ~'price')
housing <- housing %>% rename_at ('DATE', ~'date')

#split date column using '-' as the separator
housing [c('year', 'month')] <- str_split_fixed(housing$`date`, '-', 2)

#dropping a column - no need to drop month and date
#housing <- subset(housing, select = -c(month, date))
#convert year to dbl
housing$year <- as.numeric(as.character(housing$year)) 

housing |> 
  filter(date > ymd(20181231), date < ymd(20210101)) %>% 
  ggplot(aes(
    x = date,
    y = price,
  )) +
  geom_line(color = "blue")+ 
  geom_point(color = "blue",
             fill = "white",
             shape = 1 )+
 #unable to add in the quarter labels using scale_x_discrete
  #scale_x_discrete(labels("Q1", "Q2", "Q3", "Q4", "Q1", "Q2", "Q3", "Q4")+
  scale_y_continuous(breaks = seq(from = 280000, to = 380000, by = 20000),
                     labels = comma_format(big.mark = ","))+
  labs(
    x = "2019                                2020",
    y = "Dollars",
    title = "Median sales price of houses sold in the United States", 
    subtitle = "Not seasonally adjusted",
    caption = ""
  )+
   theme(plot.title = element_text(hjust = -0.60),
          plot.subtitle = element_text(hjust = -0.10))+
# Hide the lyear# Hide the legend
      theme(legend.position = "none")

4 - Expect More. Plot More.

#SETUP for THEMES 
  if (!require("pacman"))
    install.packages("pacman")
pacman::p_load(here)
pacman::p_load(tidyverse, colorspace, palmerpenguins, fs, lubridate, scales, openintro, gghighlight, glue, ggridges, dplyr)

ggplot2::theme_set(ggplot2::theme_minimal(base_size = 14))

options(width = 65)

knitr::opts_chunk$set(
  fig.width = 7,        # 7" width
  fig.asp = 0.618,      # the golden ratio
  fig.retina = 3,       # dpi multiplier for displaying HTML output on retina
  fig.align = "center", # center align figures
  dpi = 300             # higher dpi, sharper image
)

library(ggplot2)
df = data.frame(subject <- c('A','B','C'),
                value <- c(100,100,100))

ggplot(df, aes(x = subject, y = value, fill = subject....c..A....B....C..)) +
  scale_fill_manual(values = c("red","white","red")) +
  geom_col() +scale_x_discrete(limits = c("A","B","C")) +coord_polar("y")+
  #removed labels and background
  theme_void()+
  labs(
    x = "",
    y = "",
    caption = "TARGET"
  )+
  theme(legend.position = "none")+
    theme(plot.caption = element_text(color = "red", size = 36, face = "bold", hjust = 0.5))

5 - Mirror, mirror on the wall, who’s the ugliest of them all?

#SETUP for THEMES 
  if (!require("pacman"))
    install.packages("pacman")
pacman::p_load(here)
pacman::p_load(tidyverse, colorspace, palmerpenguins, fs, lubridate, scales, openintro, gghighlight, glue, ggridges, dplyr)

ggplot2::theme_set(ggplot2::theme_minimal(base_size = 14))

options(width = 65)

knitr::opts_chunk$set(
  fig.width = 7,        # 7" width
  fig.asp = 0.618,      # the golden ratio
  fig.retina = 3,       # dpi multiplier for displaying HTML output on retina
  fig.align = "center", # center align figures
  dpi = 300             # higher dpi, sharper image
)
palmerpenguins::penguins
# A tibble: 344 × 8
   species island  bill_length_mm bill_depth_mm flipper_length_mm
   <fct>   <fct>            <dbl>         <dbl>             <int>
 1 Adelie  Torger…           39.1          18.7               181
 2 Adelie  Torger…           39.5          17.4               186
 3 Adelie  Torger…           40.3          18                 195
 4 Adelie  Torger…           NA            NA                  NA
 5 Adelie  Torger…           36.7          19.3               193
 6 Adelie  Torger…           39.3          20.6               190
 7 Adelie  Torger…           38.9          17.8               181
 8 Adelie  Torger…           39.2          19.6               195
 9 Adelie  Torger…           34.1          18.1               193
10 Adelie  Torger…           42            20.2               190
# ℹ 334 more rows
# ℹ 3 more variables: body_mass_g <int>, sex <fct>, year <int>
penguins %>% 
   janitor::clean_names()
# A tibble: 344 × 8
   species island  bill_length_mm bill_depth_mm flipper_length_mm
   <fct>   <fct>            <dbl>         <dbl>             <int>
 1 Adelie  Torger…           39.1          18.7               181
 2 Adelie  Torger…           39.5          17.4               186
 3 Adelie  Torger…           40.3          18                 195
 4 Adelie  Torger…           NA            NA                  NA
 5 Adelie  Torger…           36.7          19.3               193
 6 Adelie  Torger…           39.3          20.6               190
 7 Adelie  Torger…           38.9          17.8               181
 8 Adelie  Torger…           39.2          19.6               195
 9 Adelie  Torger…           34.1          18.1               193
10 Adelie  Torger…           42            20.2               190
# ℹ 334 more rows
# ℹ 3 more variables: body_mass_g <int>, sex <fct>, year <int>
penguins %>% 
  drop_na(body_mass_g)
# A tibble: 342 × 8
   species island  bill_length_mm bill_depth_mm flipper_length_mm
   <fct>   <fct>            <dbl>         <dbl>             <int>
 1 Adelie  Torger…           39.1          18.7               181
 2 Adelie  Torger…           39.5          17.4               186
 3 Adelie  Torger…           40.3          18                 195
 4 Adelie  Torger…           36.7          19.3               193
 5 Adelie  Torger…           39.3          20.6               190
 6 Adelie  Torger…           38.9          17.8               181
 7 Adelie  Torger…           39.2          19.6               195
 8 Adelie  Torger…           34.1          18.1               193
 9 Adelie  Torger…           42            20.2               190
10 Adelie  Torger…           37.8          17.1               186
# ℹ 332 more rows
# ℹ 3 more variables: body_mass_g <int>, sex <fct>, year <int>
penguins %>% 
  ggplot(aes(x = species, y = body_mass_g, colour = island, size = body_mass_g))+
  geom_point() +
  geom_line(linewidth = 2,
            color = "green") +
  theme_dark() +
  scale_color_manual(values = c("purple", "red", "yellow")) +
  labs(
    title = "Ugliest of them All",
    subtitle = "Worst plot colors ever",
    caption = "Source: Palmerpenguins, Tidytuesday"
  ) +
  theme(plot.title = element_text(color = "red", size = 18, face = "bold", hjust = 0),
        plot.subtitle = element_text(color = "purple", size = 10, face = "bold"))
Warning: Using `size` aesthetic for lines was deprecated in ggplot2
3.4.0.
ℹ Please use `linewidth` instead.
Warning: Removed 2 rows containing missing values or values outside the
scale range (`geom_point()`).

Sources

Question 1

#ggridges citations=

#https://wilkelab.org/ggridges/articles/introduction.html

#https://wilkelab.org/ggridges/

#https://stackoverflow.com/questions/11857935/plot-the-average-values-for-each-level

#https://r-graph-gallery.com/267-reorder-a-variable-in-ggplot2.html

Question 2

#citations for how to remove the dollar sign, how to split columns into two, how to pivot long, filtering, summarizing by year and adjusting the labels for the legend=

#https://www.statology.org/remove-dollar-sign-in-r/

#https://www.statology.org/split-column-in-r/

#https://stackoverflow.com/questions/30808474/r-data-wrangling-for-emails

#https://www.statology.org/rename-single-column-in-r/

#R for Data Science chapter 5: https://r4ds.hadley.nz/data-tidy.html

#https://tidyr.tidyverse.org/reference/pivot_longer.html

#https://dplyr.tidyverse.org/reference/filter.html

#https://stackoverflow.com/questions/53808561/how-to-add-points-and-lines-in-ggplot-for-sums-by-years”)

#https://stackoverflow.com/questions/23635662/editing-legend-text-labels-in-ggplot

Question 3a

#citations for how to remove the dollar sign, how to split columns into two, how to pivot long, label left justify,formatting numbers, correcting the x-axis labels (thank you to my colleague @Wes Scott) =

#https://www.statology.org/remove-dollar-sign-in-r/

#https://www.statology.org/split-column-in-r/

#https://stackoverflow.com/questions/30808474/r-data-wrangling-for-emails

#https://www.statology.org/rename-single-column-in-r/

#R for Data Science chapter 5: https://r4ds.hadley.nz/data-tidy.html

#https://www.statology.org/ggplot-title-position/

#https://www.geeksforgeeks.org/change-formatting-of-numbers-of-ggplot2-plot-axis-in-r/”

Question 3b

#citations for how to remove the dollar sign, how to split columns into two, how to pivot long, label left justify and scale_x_date, also my colleague @WesScott, formatting numbers. =

#https://www.r-bloggers.com/2011/08/use-geom_rect-to-add-recession-bars-to-your-time-series-plots-rstats-ggplot/

#https://stackoverflow.com/questions/78249312/adding-recession-bands-to-ggplot-of-federal-interest-rate-the-taylor-rule-and-y

#https://sergiocorreia.github.io/fedplot/reference/geom_recessions.html

#https://r4ds.hadley.nz/data-tidy.html

#https://www.statology.org/ggplot-title-position/

#https://stackoverflow.com/questions/11748384/formatting-dates-on-x-axis-in-ggplot2

#https://www.geeksforgeeks.org/change-formatting-of-numbers-of-ggplot2-plot-axis-in-r/”

Question 3c

#citations for label left justify, open circle shape, formatting numbers =

#https://www.r-bloggers.com/2011/08/use-geom_rect-to-add-recession-bars-to-your-time-series-plots-rstats-ggplot/

#https://stackoverflow.com/questions/78249312/adding-recession-bands-to-ggplot-of-federal-interest-rate-the-taylor-rule-and-y #https://sergiocorreia.github.io/fedplot/reference/geom_recessions.html

#https://r4ds.hadley.nz/data-tidy.html

#https://www.statology.org/ggplot-title-position/

#https://rstudio.github.io/cheatsheets/html/data-visualization.html

#https://www.geeksforgeeks.org/change-formatting-of-numbers-of-ggplot2-plot-axis-in-r/”

Question 4

#citation for how to draw layered donut charts = #https://www.geeksf,orgeeks.org/create-multiple-pie-charts-using-ggplot2-in-r/